{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import re\n",
    "import requests\n",
    "import io\n",
    "import yaml\n",
    "import gspread\n",
    "\n",
    "from glob import glob\n",
    "from os.path import exists, join, basename\n",
    "from bs4 import BeautifulSoup\n",
    "from shutil import copyfileobj, copyfile\n",
    "from PIL import Image\n",
    "from time import sleep\n",
    "from requests_ip_rotator import ApiGateway, EXTRA_REGIONS, ALL_REGIONS\n",
    "from tqdm import tqdm"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Parse Issues"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "metadata": {},
   "outputs": [],
   "source": [
    "repos = []\n",
    "descriptions = []\n",
    "\n",
    "for f in glob('./github-issues/*.txt'):\n",
    "    with open(f) as fp:\n",
    "        lines = [l.rstrip('\\n') for l in fp.readlines()]\n",
    "        \n",
    "        cur_repo = lines[0]\n",
    "        cur_description = ''\n",
    "        is_appending = False\n",
    "        \n",
    "        for l in lines:\n",
    "            if l == '-' * 138:\n",
    "                if is_appending:\n",
    "                    repos.append(cur_repo)\n",
    "                    descriptions.append(cur_description)\n",
    "\n",
    "                cur_description = ''\n",
    "                is_appending = True\n",
    "                continue\n",
    "            \n",
    "            if is_appending:\n",
    "                cur_description += l + '\\n'\n",
    "        \n",
    "        if is_appending:\n",
    "            repos.append(cur_repo)\n",
    "            descriptions.append(cur_description)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Save to a csv\n",
    "df = pd.DataFrame({'repo': repos, 'description': descriptions})\n",
    "df.to_csv('./issues.csv', index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "gam",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
